1 Setup

library(rlang)
library(stringr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(stats)
library(ggpubr)
## Loading required package: ggplot2
library(vegan)
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.5-7
library(cowplot)
## 
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggpubr':
## 
##     get_legend
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ tibble  3.1.5     ✓ purrr   0.3.4
## ✓ tidyr   1.1.4     ✓ forcats 0.5.1
## ✓ readr   2.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x purrr::%@%()         masks rlang::%@%()
## x purrr::as_function() masks rlang::as_function()
## x dplyr::filter()      masks stats::filter()
## x purrr::flatten()     masks rlang::flatten()
## x purrr::flatten_chr() masks rlang::flatten_chr()
## x purrr::flatten_dbl() masks rlang::flatten_dbl()
## x purrr::flatten_int() masks rlang::flatten_int()
## x purrr::flatten_lgl() masks rlang::flatten_lgl()
## x purrr::flatten_raw() masks rlang::flatten_raw()
## x purrr::invoke()      masks rlang::invoke()
## x dplyr::lag()         masks stats::lag()
## x purrr::list_along()  masks rlang::list_along()
## x purrr::modify()      masks rlang::modify()
## x purrr::prepend()     masks rlang::prepend()
## x purrr::splice()      masks rlang::splice()
#library(MCMC.OTU)
#install.packages("remotes")
#remotes::install_github("Jtrachsel/funfuns")
library("funfuns")

Read in data

#setwd('/Users/nicolakriefall/Google Drive (nicfall@bu.edu)/Moorea_revisions/mr16s_revised/analysis/03.community_comp')
setwd("~/nicfall@bu.edu - Google Drive/My Drive/Moorea_revisions/mr16s_revised/analysis/03.community_comp")

samdf <- read.csv("mr16s_sampledata_plusneg copy.csv",header=TRUE)
load("taxa2 copy.Rdata")

#load("ps.clean.Rdata")
#load("ps.rare.Rdata")
load("ps.rare.trim.Rdata")
load("ps.trim.Rdata")

Rename ASVs to be more informative

tax <- as.data.frame(ps.rare.trim@tax_table@.Data)
## Loading required package: phyloseq
## Warning: package 'phyloseq' was built under R version 4.0.3
tax.clean <- data.frame(row.names = row.names(tax),
                        Kingdom = str_replace(tax[,1], "D_0__",""),
                        Phylum = str_replace(tax[,2], "D_1__",""),
                        Class = str_replace(tax[,3], "D_2__",""),
                        Order = str_replace(tax[,4], "D_3__",""),
                        Family = str_replace(tax[,5], "D_4__",""),
                        Genus = str_replace(tax[,6], "D_5__",""),
                        Species = str_replace(tax[,7], "D_6__",""),
                        stringsAsFactors = FALSE)
tax.clean[is.na(tax.clean)] <- ""

for (i in 1:7){ tax.clean[,i] <- as.character(tax.clean[,i])}
####### Fill holes in the tax table
tax.clean[is.na(tax.clean)] <- ""
for (i in 1:nrow(tax.clean)){
  if (tax.clean[i,2] == ""){
    kingdom <- paste("Kingdom_", tax.clean[i,1], sep = "")
    tax.clean[i, 2:7] <- kingdom
  } else if (tax.clean[i,3] == ""){
    phylum <- paste("Phylum_", tax.clean[i,2], sep = "")
    tax.clean[i, 3:7] <- phylum
  } else if (tax.clean[i,4] == ""){
    class <- paste("Class_", tax.clean[i,3], sep = "")
    tax.clean[i, 4:7] <- class
  } else if (tax.clean[i,5] == ""){
    order <- paste("Order_", tax.clean[i,4], sep = "")
    tax.clean[i, 5:7] <- order
  } else if (tax.clean[i,6] == ""){
    family <- paste("Family_", tax.clean[i,5], sep = "")
    tax.clean[i, 6:7] <- family
  } else if (tax.clean[i,7] == ""){
    tax.clean$Species[i] <- paste("Genus",tax.clean$Genus[i], sep = "_")
  }
}

tax_table(ps.rare.trim) <- as.matrix(tax.clean)

2 Core vs. accessory

2.1 Core

## Warning: package 'microbiome' was built under R version 4.0.3
## 
## microbiome R package (microbiome.github.com)
##     
## 
## 
##  Copyright (C) 2011-2020 Leo Lahti, 
##     Sudarshan Shetty et al. <microbiome.github.io>
## 
## Attaching package: 'microbiome'
## The following object is masked from 'package:vegan':
## 
##     diversity
## The following object is masked from 'package:ggplot2':
## 
##     alpha
## The following object is masked from 'package:base':
## 
##     transform
pseq.core <- core(ps.rare.trim, detection = 0, prevalence = .7)
pseq.core #9 taxa
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 9 taxa and 84 samples ]
## sample_data() Sample Data:       [ 84 samples by 9 sample variables ]
## tax_table()   Taxonomy Table:    [ 9 taxa by 7 taxonomic ranks ]
#saving
#core.tax <- data.frame(pseq.core@tax_table)
#write.csv(core.tax,"core.taxa.csv")

ps_glom <- tax_glom(pseq.core, "Genus")
ps0 <- transform_sample_counts(ps_glom, function(x) x / sum(x))
ps1 <- merge_samples(ps0, "site_zone")
## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion
ps2 <- transform_sample_counts(ps1, function(x) x / sum(x))

plot_bar(ps2, fill="Genus")+
  geom_bar(stat="identity")+
  theme_cowplot()+
  scale_fill_brewer(palette="BrBG")

#ggsave(file="core.bar.pdf",width=8)

#plot_ordination(pseq.core,ordination="")
plot_ordination(pseq.core,ordinate(pseq.core,"PCoA", "bray"),color="site", shape="site")+
  stat_ellipse()+
  theme_cowplot()+
  scale_color_manual(name="Site",values=c("darkslategray3","darkslategray4","#000004"))+
  scale_shape_manual(name="Site",values=c(8,4,9))#+

  #xlab("Axis 1 (42.6%)")+
  #ylab("Axis 2 (24.1%)")+
  #ggtitle("Rarefied")

#by site
ps.core.mnw <- subset_samples(pseq.core,site=="MNW")
ps.core.mse <- subset_samples(pseq.core,site=="MSE")
ps.core.tnw <- subset_samples(pseq.core,site=="TNW")

plot_ordination(ps.core.mnw,ordinate(ps.core.mnw,"PCoA", "bray"),color="zone", shape="zone")+
  stat_ellipse()+
  theme_cowplot()#+

  #scale_color_manual(name="Site",values=c("darkslategray3","darkslategray4","#000004"))+
  #scale_shape_manual(name="Site",values=c(8,4,9))#+
  #xlab("Axis 1 (42.6%)")+
  #ylab("Axis 2 (24.1%)")+
  #ggtitle("Rarefied")
# calculating core abundances #
core.sqs <- tax_table(pseq.core)
core.sqs.ids <- row.names(core.sqs)
core.sqs.ids
## [1] "sq1"  "sq2"  "sq4"  "sq7"  "sq8"  "sq10" "sq14" "sq21" "sq23"
ps.rare.trim.rel <- transform_sample_counts(ps.rare.trim, function(x) x / sum(x))
seq.rare.rel <- data.frame(otu_table(ps.rare.trim.rel))
tax.core <- tax_table(ps.rare.trim.rel)

seq.core <- seq.rare.rel %>% select(c(core.sqs.ids))
## Note: Using an external vector in selections is ambiguous.
## ℹ Use `all_of(core.sqs.ids)` instead of `core.sqs.ids` to silence this message.
## ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
core.rel <- data.frame(colMeans(seq.core))

total.rel <- data.frame(colMeans(seq.rare.rel))
total.rel.ordered <- data.frame(total.rel[order(-total.rel$colMeans.seq.rare.rel.),,drop=FALSE])

Core stats

seq.core <- data.frame(otu_table(pseq.core))

dist.core <- vegdist(seq.core)
samdf.core <- data.frame(sample_data(pseq.core))
row.names(samdf.core)==row.names(seq.core)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
bet.all <- betadisper(dist.core,samdf.core$zone)
anova(bet.all)
## Analysis of Variance Table
## 
## Response: Distances
##           Df  Sum Sq   Mean Sq F value Pr(>F)
## Groups     1 0.00243 0.0024279  0.0787 0.7797
## Residuals 82 2.52881 0.0308392
plot(bet.all) #very much overlap, not sig

bet.all <- betadisper(dist.core,samdf.core$site)
anova(bet.all)
## Analysis of Variance Table
## 
## Response: Distances
##           Df  Sum Sq  Mean Sq F value Pr(>F)
## Groups     2 0.14541 0.072703  2.2715 0.1097
## Residuals 81 2.59250 0.032006
plot(bet.all) #very much overlap, not sig

adonis(seq.core ~ site, data=samdf.core, permutations=999)
## 
## Call:
## adonis(formula = seq.core ~ site, data = samdf.core, permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)   
## site       2     1.210 0.60498  4.3351 0.09669  0.002 **
## Residuals 81    11.304 0.13955         0.90331          
## Total     83    12.514                 1.00000          
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis(seq.core ~ site/zone, data=samdf.core, permutations=999)
## 
## Call:
## adonis(formula = seq.core ~ site/zone, data = samdf.core, permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## site       2    1.2100 0.60498  4.8302 0.09669  0.001 ***
## site:zone  3    1.5343 0.51142  4.0832 0.12261  0.001 ***
## Residuals 78    9.7695 0.12525         0.78070           
## Total     83   12.5137                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis(seq.core ~ zone, strata=samdf.core$site,data=samdf.core, permutations=999)
## 
## Call:
## adonis(formula = seq.core ~ zone, data = samdf.core, permutations = 999,      strata = samdf.core$site) 
## 
## Blocks:  strata 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)
## zone       1    0.0911 0.09110 0.60134 0.00728   0.55
## Residuals 82   12.4226 0.15149         0.99272       
## Total     83   12.5137                 1.00000
pairwise.adonis(seq.core, factors = samdf.core$site, permutations = 999)
##        pairs  F.Model         R2 p.value p.adjusted
## 1 MNW vs MSE 3.404324 0.05828891   0.030      0.030
## 2 MNW vs TNW 2.306130 0.04169754   0.088      0.088
## 3 MSE vs TNW 7.078557 0.11589267   0.005      0.005
#MNW only marginally different than TNW surprisingly

#### by site - MNW ####
seq.core.mnw <- data.frame(otu_table(ps.core.mnw))

dist.core.mnw <- vegdist(seq.core.mnw)
samdf.core.mnw <- data.frame(sample_data(ps.core.mnw))
row.names(samdf.core.mnw)==row.names(seq.core.mnw)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
bet.all <- betadisper(dist.core.mnw,samdf.core.mnw$zone)
anova(bet.all)
## Analysis of Variance Table
## 
## Response: Distances
##           Df  Sum Sq  Mean Sq F value Pr(>F)
## Groups     1 0.00192 0.001918  0.0477 0.8289
## Residuals 26 1.04649 0.040249
plot(bet.all) #very much overlap, not sig

adonis(seq.core.mnw ~ zone,data=samdf.core.mnw, permutations=999)
## 
## Call:
## adonis(formula = seq.core.mnw ~ zone, data = samdf.core.mnw,      permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs  MeanSqs F.Model      R2 Pr(>F)
## zone       1    0.0248 0.024787 0.17921 0.00685  0.903
## Residuals 26    3.5961 0.138310         0.99315       
## Total     27    3.6209                  1.00000
#not sig

2.2 Accessory

ps.rare.trim.otu <- data.frame(ps.rare.trim@otu_table)
core.tax <- data.frame(pseq.core@tax_table)
core.ids <- c(rownames(core.tax))
ps.rare.trim.acc.otu <- ps.rare.trim.otu[,!colnames(ps.rare.trim.otu) %in% core.ids ]
row.names(samdf) <- samdf$id

#remake phyloseq object
ps.acc <- phyloseq(otu_table(ps.rare.trim.acc.otu, taxa_are_rows=FALSE), 
                         sample_data(samdf), 
                         tax_table(taxa2))
ps.acc #214 taxa accessory

plot_ordination(ps.acc,ordinate(ps.acc,"PCoA", "bray"),color="site", shape="site")+
  stat_ellipse()+
  theme_cowplot()+
  scale_color_manual(name="Site",values=c("darkslategray3","darkslategray4","#000004"))+
  scale_shape_manual(name="Site",values=c(8,4,9))#+
  #xlab("Axis 1 (42.6%)")+
  #ylab("Axis 2 (24.1%)")+
  #ggtitle("Rarefied")
#ggsave(file="pcoa.acc.all.pdf")

#by site
ps.acc.mnw <- subset_samples(ps.acc,site=="MNW")
ps.acc.mse <- subset_samples(ps.acc,site=="MSE")
ps.acc.tnw <- subset_samples(ps.acc,site=="TNW")

gg.pcoa.acc.mnw <- plot_ordination(ps.acc.mnw,ordinate(ps.acc.mnw,"PCoA", "bray"),color="zone", shape="zone")+
  stat_ellipse()+
  theme_cowplot()+
  scale_shape_manual(values=c(16,15),labels=c("Back reef","Fore reef"))+
  scale_color_manual(values=c("#ED7953FF","#8405A7FF"),labels=c("Back reef","Fore reef"))+
  guides(color=guide_legend(title="Reef zone"),shape=guide_legend(title="Reef zone"))

gg.pcoa.acc.mse <- plot_ordination(ps.acc.mse,ordinate(ps.acc.mse,"PCoA", "bray"),color="zone", shape="zone")+
  stat_ellipse()+
  theme_cowplot()+
  scale_shape_manual(values=c(16,15),labels=c("Back reef","Fore reef"))+
  scale_color_manual(values=c("#ED7953FF","#8405A7FF"),labels=c("Back reef","Fore reef"))+
  guides(color=guide_legend(title="Reef zone"),shape=guide_legend(title="Reef zone"))

gg.pcoa.acc.tnw <- plot_ordination(ps.acc.tnw,ordinate(ps.acc.tnw,"PCoA", "bray"),color="zone", shape="zone")+
  stat_ellipse()+
  theme_cowplot()+
  scale_shape_manual(values=c(16,15),labels=c("Back reef","Fore reef"))+
  scale_color_manual(values=c("#ED7953FF","#8405A7FF"),labels=c("Back reef","Fore reef"))+
  guides(color=guide_legend(title="Reef zone"),shape=guide_legend(title="Reef zone"))

ggarrange(gg.pcoa.acc.mnw,gg.pcoa.acc.mse,gg.pcoa.acc.tnw,nrow=1,common.legend=T,legend="right")
#ggsave("pcoa.acc.zone.pdf",width=11,height=3)

#### by site - MNW ####
seq.acc.mnw <- data.frame(otu_table(ps.acc.mnw))

dist.acc.mnw <- vegdist(seq.acc.mnw)
samdf.acc.mnw <- data.frame(sample_data(ps.acc.mnw))
row.names(samdf.acc.mnw)==row.names(seq.acc.mnw)

bet.all <- betadisper(dist.acc.mnw,samdf.acc.mnw$zone)
anova(bet.all)
plot(bet.all) #very much overlap, not sig

adonis(seq.acc.mnw ~ zone,data=samdf.acc.mnw, permutations=999)
# Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)   
# zone       1    0.7993 0.79929  2.1125 0.07514  0.002 **
# Residuals 26    9.8374 0.37836         0.92486          
# Total     27   10.6367                 1.00000          
# ---
# Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

#### by site - MSE ####
seq.acc.mse <- data.frame(otu_table(ps.acc.mse))

dist.acc.mse <- vegdist(seq.acc.mse)
samdf.acc.mse <- data.frame(sample_data(ps.acc.mse))
row.names(samdf.acc.mse)==row.names(seq.acc.mse)

bet.all <- betadisper(dist.acc.mse,samdf.acc.mse$zone)
anova(bet.all)
plot(bet.all) 
# Df   Sum Sq   Mean Sq F value  Pr(>F)  
# Groups     1 0.021763 0.0217633  4.5724 0.04169 *
# Residuals 27 0.128513 0.0047597                  
# ---
# Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

adonis(seq.acc.mse ~ zone,data=samdf.acc.mse, permutations=999)
# Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)   
# zone       1    0.8113 0.81126  2.2489 0.07689  0.004 **
# Residuals 27    9.7400 0.36074         0.92311          
# Total     28   10.5513                 1.00000 

#### by site - TNW ####
seq.acc.tnw <- data.frame(otu_table(ps.acc.tnw))

dist.acc.tnw <- vegdist(seq.acc.tnw)
samdf.acc.tnw <- data.frame(sample_data(ps.acc.tnw))
row.names(samdf.acc.tnw)==row.names(seq.acc.tnw)

bet.all <- betadisper(dist.acc.tnw,samdf.acc.tnw$zone)
anova(bet.all)
plot(bet.all) #ns                

adonis(seq.acc.tnw ~ zone,data=samdf.acc.tnw, permutations=999)
#  Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)   
# zone       1    0.8429 0.84287  2.2735 0.08336  0.002 **
# Residuals 25    9.2685 0.37074         0.91664          
# Total     26   10.1113                 1.00000          

3 Bar plots [needs fixing]

3.1 All, by Phylum

# ps.sz <- merge_samples(ps.rare.trim, "site_zone")
# ps.rel.sz <- transform_sample_counts(ps.sz, function(x) x / sum(x))
# plot_bar(ps.rel.sz, fill="Class")+
#   geom_bar(stat="identity")

ps.all.tab <- psmelt(ps.rare.trim)%>%
  filter(!is.na(Abundance))%>%
  group_by(site,zone,site_zone,Phylum,OTU)%>%
  summarize_at("Abundance",mean)

ps.all.tab$site[ps.all.tab$site == "MNW"] <- "Mo'orea NW"
ps.all.tab$site[ps.all.tab$site == "MSE"] <- "Mo'orea SE"
ps.all.tab$site[ps.all.tab$site == "TNW"] <- "Tahiti NW"

ps.all.tab$zone[ps.all.tab$zone == "Forereef"] <- "FR"
ps.all.tab$zone[ps.all.tab$zone == "Backreef"] <- "BR"

gg.bar.all.phy <- ggplot(ps.all.tab,aes(x=zone,y=Abundance,fill=Phylum))+
  geom_bar(stat="identity")+
  theme_cowplot()+
  #theme(legend.position="none")+
  xlab('Reef zone')+  
  facet_wrap(~site)

gg.bar.all.phy
#ggsave(gg.bar.all,file="bac.bar.all.pdf",height=4)
pa <- psmelt(ps.all)
tb <- psmelt(ps.all)%>%
  filter(!is.na(Abundance))%>%
  group_by(site,zone,site_zone,Class,OTU)%>%
  summarize_at("Abundance",mean)

tb$zone <- gsub("out","FR",tb$zone)
tb$zone <- gsub("in","BR",tb$zone)

tb$site <- gsub("MNW","Mo'orea NW",tb$site)
tb$site <- gsub("MSE","Mo'orea SE",tb$site)
tb$site <- gsub("TNW","Tahiti NW",tb$site)

quartz()
ggplot(tb,aes(x=zone,y=Abundance,fill=Class))+
  geom_bar(stat="identity")+
  theme_cowplot()+
  #theme(legend.position="none")+
  xlab('Reef zone')+  
  facet_wrap(~site)

3.2 All, by Class

ps.sz <- merge_samples(ps.rare.trim, "site_zone")
## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion
ps.rel.sz <- transform_sample_counts(ps.sz, function(x) x / sum(x))
ps.rel.sz@sam_data$site <- c("a","b","c","a","b","c")
ps.rel.sz@sam_data$zone <- c("a","b","c","a","b","c")
plot_bar(ps.rel.sz,fill="Class")+
  geom_bar(stat="identity")+
  facet_wrap(~site*zone)

ps.all.tab <- psmelt(ps.rare.trim)%>%
  filter(!is.na(Abundance))%>%
  group_by(site,zone,site_zone,Class,OTU)%>%
  summarize_at("Abundance",mean)

ps.all.tab$site[ps.all.tab$site == "MNW"] <- "Mo'orea NW"
ps.all.tab$site[ps.all.tab$site == "MSE"] <- "Mo'orea SE"
ps.all.tab$site[ps.all.tab$site == "TNW"] <- "Tahiti NW"

ps.all.tab$zone[ps.all.tab$zone == "Forereef"] <- "FR"
ps.all.tab$zone[ps.all.tab$zone == "Backreef"] <- "BR"

gg.bar.all.cla <- ggplot(ps.all.tab,aes(x=zone,y=Abundance,fill=Class))+
  geom_bar(stat="identity")+
  theme_cowplot()+
  #theme(legend.position="none")+
  xlab('Reef zone')+  
  facet_wrap(~site)

gg.bar.all.cla

#ggsave(gg.bar.all,file="bac.bar.all.pdf",height=4)

4 PCOA plots

4.1 Plots - site

4.1.1 Rarefied

ord <- ordinate(ps.rare.trim, "PCoA", "bray")
gg.pcoa.site.rare <- plot_ordination(ps.rare.trim, ord,color="site", shape="site")+
  stat_ellipse()+
  theme_cowplot()+
  scale_color_manual(name="Site",values=c("darkslategray3","darkslategray4","#000004"))+
  scale_shape_manual(name="Site",values=c(8,4,9))#+
  #xlab("Axis 1 (42.6%)")+
  #ylab("Axis 2 (24.1%)")+
  #ggtitle("Rarefied")
gg.pcoa.site.rare

Stats

Help on adonis (here)[https://thebiobucket.blogspot.com/2011/04/assumptions-for-permanova-with-adonis.html#more]

seq.rare <- data.frame(otu_table(ps.rare.trim))

dist.rare <- vegdist(seq.rare)
samdf.rare <- data.frame(sample_data(ps.rare.trim))
row.names(samdf.rare)==row.names(seq.rare)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
bet.all <- betadisper(dist.rare,samdf.rare$zone)
anova(bet.all)
## Analysis of Variance Table
## 
## Response: Distances
##           Df  Sum Sq   Mean Sq F value Pr(>F)
## Groups     1 0.00107 0.0010749  0.0514 0.8213
## Residuals 82 1.71631 0.0209306
plot(bet.all) #very much overlap, not sig

bet.all <- betadisper(dist.rare,samdf.rare$site)
anova(bet.all)
## Analysis of Variance Table
## 
## Response: Distances
##           Df  Sum Sq  Mean Sq F value Pr(>F)
## Groups     2 0.10726 0.053628  2.3453 0.1023
## Residuals 81 1.85220 0.022867
plot(bet.all) #very much overlap, not sig

adonis(seq.rare ~ site, data=samdf.rare, permutations=999)
## 
## Call:
## adonis(formula = seq.rare ~ site, data = samdf.rare, permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## site       2    1.4833 0.74165  4.2386 0.09474  0.001 ***
## Residuals 81   14.1730 0.17498         0.90526           
## Total     83   15.6563                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis(seq.rare ~ site/zone, data=samdf.rare, permutations=999)
## 
## Call:
## adonis(formula = seq.rare ~ site/zone, data = samdf.rare, permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## site       2    1.4833 0.74165  4.6756 0.09474  0.001 ***
## site:zone  3    1.8003 0.60011  3.7832 0.11499  0.001 ***
## Residuals 78   12.3727 0.15862         0.79027           
## Total     83   15.6563                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis(seq.rare ~ zone, strata=samdf.rare$site,data=samdf.rare, permutations=999)
## 
## Call:
## adonis(formula = seq.rare ~ zone, data = samdf.rare, permutations = 999,      strata = samdf.rare$site) 
## 
## Blocks:  strata 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)
## zone       1    0.2025 0.20253  1.0747 0.01294  0.288
## Residuals 82   15.4538 0.18846         0.98706       
## Total     83   15.6563                 1.00000
pairwise.adonis(seq.rare, factors = samdf.rare$site, permutations = 999)
##        pairs  F.Model         R2 p.value p.adjusted
## 1 MNW vs MSE 3.863638 0.06563709   0.006      0.006
## 2 MNW vs TNW 2.474825 0.04461168   0.041      0.041
## 3 MSE vs TNW 6.216217 0.10323162   0.001      0.001
#all significantly different

4.1.2 Relative abundance

ps.trim.rel <- transform_sample_counts(ps.trim, function(x) x / sum(x))
ord.rel <- ordinate(ps.trim.rel, "PCoA", "bray")
gg.pcoa.site <- plot_ordination(ps.trim.rel, ord.rel,color="site", shape="site")+
  stat_ellipse()+
  theme_cowplot()+
  scale_color_manual(name="Site",values=c("darkslategray3","darkslategray4","#000004"))+
  scale_shape_manual(name="Site",values=c(8,4,9))+
  xlab("Axis 1 (44.1%)")+
  ylab("Axis 2 (23%)")+
  ggtitle("Relative abundance")
gg.pcoa.site

Stats

seq.trim <- data.frame(otu_table(ps.trim.rel))

dist.trim <- vegdist(seq.trim)
samdf.trim <- data.frame(sample_data(ps.trim))
row.names(samdf.trim)==row.names(seq.trim)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [91] TRUE TRUE
bet.all <- betadisper(dist.trim,samdf.trim$zone)
anova(bet.all)
## Analysis of Variance Table
## 
## Response: Distances
##           Df  Sum Sq   Mean Sq F value Pr(>F)
## Groups     1 0.00004 0.0000422  0.0017 0.9668
## Residuals 90 2.17370 0.0241522
#permutest(bet.all, pairwise = FALSE, permutations = 999)
plot(bet.all) #very much overlap, not sig

bet.all <- betadisper(dist.trim,samdf.trim$site)
anova(bet.all)
## Analysis of Variance Table
## 
## Response: Distances
##           Df  Sum Sq  Mean Sq F value  Pr(>F)  
## Groups     2 0.16261 0.081306  3.4989 0.03445 *
## Residuals 89 2.06812 0.023237                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(bet.all) #very much overlap, not sig

adonis(seq.trim ~ site, data=samdf.trim, permutations=999)
## 
## Call:
## adonis(formula = seq.trim ~ site, data = samdf.trim, permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## site       2    1.5696 0.78480  4.6469 0.09455  0.001 ***
## Residuals 89   15.0308 0.16889         0.90545           
## Total     91   16.6004                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis(seq.trim ~ site/zone, data=samdf.trim, permutations=999)
## 
## Call:
## adonis(formula = seq.trim ~ site/zone, data = samdf.trim, permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## site       2    1.5696 0.78480  5.0583 0.09455  0.001 ***
## site:zone  3    1.6880 0.56267  3.6266 0.10168  0.001 ***
## Residuals 86   13.3428 0.15515         0.80376           
## Total     91   16.6004                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis(seq.trim ~ zone, strata=samdf.trim$site,data=samdf.trim, permutations=999)
## 
## Call:
## adonis(formula = seq.trim ~ zone, data = samdf.trim, permutations = 999,      strata = samdf.trim$site) 
## 
## Blocks:  strata 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model     R2 Pr(>F)
## zone       1    0.2076 0.20757  1.1396 0.0125  0.274
## Residuals 90   16.3929 0.18214         0.9875       
## Total     91   16.6004                 1.0000
pairwise.adonis(seq.trim, factors = samdf.trim$site, permutations = 999)
##        pairs  F.Model         R2 p.value p.adjusted
## 1 MNW vs MSE 4.059614 0.06541475   0.002      0.002
## 2 MNW vs TNW 2.631393 0.04201397   0.039      0.039
## 3 MSE vs TNW 7.077436 0.10551142   0.003      0.003
#all significantly different

4.1.3 Plot - both

They look super similar

ggarrange(gg.pcoa.site.rare,gg.pcoa.site,labels="AUTO",common.legend=TRUE,legend="right")

4.2 Plots - reef zones

ps.mnw.rel <- subset_samples(ps.trim.rel,site=="MNW")
ps.mnw <- subset_samples(ps.rare.trim,site=="MNW")

ps.mse.rel <- subset_samples(ps.trim.rel,site=="MSE")
ps.mse <- subset_samples(ps.rare.trim,site=="MSE")

ps.tnw.rel <- subset_samples(ps.trim.rel,site=="TNW")
ps.tnw <- subset_samples(ps.rare.trim,site=="TNW")

4.2.1 Mo’orea NW

Relative abundance - appears total overlap

ord.mnw.rel <- ordinate(ps.mnw.rel, "PCoA", "bray")
gg.mnw.rel <- plot_ordination(ps.mnw.rel, ord.mnw.rel,color="zone", shape="zone")+
  geom_point(size=2)+
  stat_ellipse()+
  theme_cowplot()+
  scale_shape_manual(values=c(16,15),labels=c("Back reef","Fore reef"))+
  scale_color_manual(values=c("#ED7953FF","#8405A7FF"),labels=c("Back reef","Fore reef"))+
  guides(color=guide_legend(title="Reef zone"),shape=guide_legend(title="Reef zone"))+
  ggtitle("Mo'orea NW")+
  xlab("Axis 1 (38.8%)")+
  ylab("Axis 2 (29.8%)")+
  theme(axis.text=element_text(size=10))
gg.mnw.rel

Stats

seq.mnw.rel <- data.frame(otu_table(ps.mnw.rel))
samdf.mnw.rel <- data.frame(sample_data(ps.mnw.rel))
row.names(samdf.mnw.rel)==row.names(seq.mnw.rel)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
dist.mnw.rel <- vegdist(seq.mnw.rel)
bet.mnw.rel <- betadisper(dist.mnw.rel,samdf.mnw.rel$zone,bias.adjust = TRUE,type="median")
anova(bet.mnw.rel) #not sig
## Analysis of Variance Table
## 
## Response: Distances
##           Df  Sum Sq   Mean Sq F value Pr(>F)
## Groups     1 0.00407 0.0040681  0.1711 0.6823
## Residuals 28 0.66588 0.0237815
#permutest(bet.mnw.rel, pairwise = FALSE, permutations = 999) #says the same thing
plot(bet.mnw.rel)

adonis(seq.mnw.rel ~ zone, data=samdf.mnw.rel, permutations=999) #not sig
## 
## Call:
## adonis(formula = seq.mnw.rel ~ zone, data = samdf.mnw.rel, permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)
## zone       1    0.1165 0.11650 0.68244 0.02379  0.594
## Residuals 28    4.7800 0.17072         0.97621       
## Total     29    4.8965                 1.00000

Rarefied

Slightly less overlap - but maybe still non-significant?

ord.mnw <- ordinate(ps.mnw, "PCoA", "bray")
gg.mnw <- plot_ordination(ps.mnw, ord.mnw,color="zone", shape="zone")+
  geom_point(size=2)+
  stat_ellipse()+
  theme_cowplot()+
  scale_shape_manual(values=c(16,15),labels=c("BR","FR"))+
  scale_color_manual(values=c("#ED7953FF","#8405A7FF"),labels=c("BR","FR"))+
  guides(color=guide_legend(title="Reef zone"),shape=guide_legend(title="Reef zone"))+
  ggtitle("Mo'orea NW")+
  xlab("Axis 1 (38.9%)")+
  ylab("Axis 2 (30.4%)")+
  theme(axis.text=element_text(size=10))
gg.mnw

Stats

seq.mnw.rare <- data.frame(otu_table(ps.mnw))
samdf.mnw.rare <- data.frame(sample_data(ps.mnw))
row.names(samdf.mnw.rare)==row.names(seq.mnw.rare)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
dist.mnw.rare <- vegdist(seq.mnw.rare)
bet.mnw.rare <- betadisper(dist.mnw.rare,samdf.mnw.rare$zone,bias.adjust = TRUE,type="median")
anova(bet.mnw.rare) #not sig
## Analysis of Variance Table
## 
## Response: Distances
##           Df  Sum Sq   Mean Sq F value Pr(>F)
## Groups     1 0.00111 0.0011097   0.048 0.8284
## Residuals 26 0.60154 0.0231360
#permutest(bet.mnw.rare, pairwise = FALSE, permutations = 999) #says the same thing
plot(bet.mnw.rare)

adonis(seq.mnw.rare ~ zone, data=samdf.mnw.rare, permutations=999) #not sig
## 
## Call:
## adonis(formula = seq.mnw.rare ~ zone, data = samdf.mnw.rare,      permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)
## zone       1    0.1169 0.11692 0.66528 0.02495  0.628
## Residuals 26    4.5693 0.17574         0.97505       
## Total     27    4.6862                 1.00000

4.2.2 Mo’orea SE

Relative abundance - appears to separate more than MNW

ord.mse.rel <- ordinate(ps.mse.rel, "PCoA", "bray")
gg.mse.rel <- plot_ordination(ps.mse.rel, ord.mse.rel,color="zone", shape="zone")+
  geom_point(size=2)+
  stat_ellipse()+
  theme_cowplot()+
  scale_shape_manual(values=c(16,15),labels=c("BR","FR"))+
  scale_color_manual(values=c("#ED7953FF","#8405A7FF"),labels=c("BR","FR"))+
  guides(color=guide_legend(title="Reef zone"),shape=guide_legend(title="Reef zone"))+
  ggtitle("Mo'orea SE")+
  xlab("Axis 1 (39.6%)")+
  ylab("Axis 2 (27.2%)")+
  theme(axis.text=element_text(size=10))
gg.mse.rel

Stats

seq.mse.rel <- data.frame(otu_table(ps.mse.rel))
samdf.mse.rel <- data.frame(sample_data(ps.mse.rel))
row.names(samdf.mse.rel)==row.names(seq.mse.rel)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
dist.mse.rel <- vegdist(seq.mse.rel)
bet.mse.rel <- betadisper(dist.mse.rel,samdf.mse.rel$zone,bias.adjust = TRUE,type="median")
anova(bet.mse.rel) #not sig
## Analysis of Variance Table
## 
## Response: Distances
##           Df  Sum Sq   Mean Sq F value Pr(>F)
## Groups     1 0.00686 0.0068585  0.2188 0.6436
## Residuals 28 0.87784 0.0313514
#permutest(bet.mse.rel, pairwise = FALSE, permutations = 999) #says the same thing
plot(bet.mse.rel)

adonis(seq.mse.rel ~ zone, data=samdf.mse.rel, permutations=999) #p < 0.01**
## 
## Call:
## adonis(formula = seq.mse.rel ~ zone, data = samdf.mse.rel, permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)   
## zone       1    0.7797 0.77967   4.198 0.13038  0.007 **
## Residuals 28    5.2002 0.18572         0.86962          
## Total     29    5.9799                 1.00000          
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Rarefied

Equivalent

ord.mse <- ordinate(ps.mse, "PCoA", "bray")
gg.mse <- plot_ordination(ps.mse, ord.mse,color="zone", shape="zone")+
  geom_point(size=2)+
  stat_ellipse()+
  theme_cowplot()+
  scale_shape_manual(values=c(16,15),labels=c("BR","FR"))+
  scale_color_manual(values=c("#ED7953FF","#8405A7FF"),labels=c("BR","FR"))+
  guides(color=guide_legend(title="Reef zone"),shape=guide_legend(title="Reef zone"))+
  ggtitle("Mo'orea SE")+
  xlab("Axis 1 (38.1%)")+
  ylab("Axis 2 (27.8%)")+
  theme(axis.text=element_text(size=10))
gg.mse

Stats

Same story

seq.mse.rare <- data.frame(otu_table(ps.mse))
samdf.mse.rare <- data.frame(sample_data(ps.mse))
row.names(samdf.mse.rare)==row.names(seq.mse.rare)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
dist.mse.rare <- vegdist(seq.mse.rare)
bet.mse.rare <- betadisper(dist.mse.rare,samdf.mse.rare$zone,bias.adjust = TRUE,type="median")
anova(bet.mse.rare) #not sig
## Analysis of Variance Table
## 
## Response: Distances
##           Df  Sum Sq   Mean Sq F value Pr(>F)
## Groups     1 0.00799 0.0079868  0.2595 0.6146
## Residuals 27 0.83094 0.0307756
#permutest(bet.mse.rare, pairwise = FALSE, permutations = 999) #says the same thing
plot(bet.mse.rare)

adonis(seq.mse.rare ~ zone, data=samdf.mse.rare, permutations=999) #p < 0.01**
## 
## Call:
## adonis(formula = seq.mse.rare ~ zone, data = samdf.mse.rare,      permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)  
## zone       1    0.6971 0.69710  3.7137 0.12091  0.013 *
## Residuals 27    5.0681 0.18771         0.87909         
## Total     28    5.7652                 1.00000         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

4.2.3 Tahiti NW

Relative abundance - looks very silly

ord.tnw.rel <- ordinate(ps.tnw.rel, "PCoA", "bray")
gg.tnw.rel <- plot_ordination(ps.tnw.rel, ord.tnw.rel,color="zone", shape="zone")+
  geom_point(size=2)+
  stat_ellipse()+
  theme_cowplot()+
  scale_shape_manual(values=c(16,15),labels=c("Back reef","Fore reef"))+
  scale_color_manual(values=c("#ED7953FF","#8405A7FF"),labels=c("Back reef","Fore reef"))+
  guides(color=guide_legend(title="Reef zone"),shape=guide_legend(title="Reef zone"))+
  ggtitle("Tahiti NW")+
  #xlab("Axis 1 (60.2%)")+
  #ylab("Axis 2 (9.7%)")+
  theme(axis.text=element_text(size=10))
gg.tnw.rel
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure

Stats

seq.tnw.rel <- data.frame(otu_table(ps.tnw.rel))
samdf.tnw.rel <- data.frame(sample_data(ps.tnw.rel))
row.names(samdf.tnw.rel)==row.names(seq.tnw.rel)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE
dist.tnw.rel <- vegdist(seq.tnw.rel)
bet.tnw.rel <- betadisper(dist.tnw.rel,samdf.tnw.rel$zone,bias.adjust = TRUE,type="median")
anova(bet.tnw.rel) #p < 0.05*
## Analysis of Variance Table
## 
## Response: Distances
##           Df  Sum Sq Mean Sq F value  Pr(>F)  
## Groups     1 0.19049 0.19049   7.243 0.01152 *
## Residuals 30 0.78899 0.02630                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#permutest(bet.tnw.rel, pairwise = FALSE, permutations = 999) #says the same thing
plot(bet.tnw.rel)

adonis(seq.tnw.rel ~ zone, data=samdf.tnw.rel, permutations=999) #p < 0.01**
## 
## Call:
## adonis(formula = seq.tnw.rel ~ zone, data = samdf.tnw.rel, permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model     R2 Pr(>F)   
## zone       1    0.7918 0.79183  7.0644 0.1906  0.002 **
## Residuals 30    3.3626 0.11209         0.8094          
## Total     31    4.1544                 1.0000          
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Rarefied

Equivalent

ord.tnw <- ordinate(ps.tnw, "PCoA", "bray")
gg.tnw <- plot_ordination(ps.tnw, ord.tnw,color="zone", shape="zone")+
  geom_point(size=2)+
  stat_ellipse()+
  theme_cowplot()+
  scale_shape_manual(values=c(16,15),labels=c("BR","FR"))+
  scale_color_manual(values=c("#ED7953FF","#8405A7FF"),labels=c("BR","FR"))+
  guides(color=guide_legend(title="Reef zone"),shape=guide_legend(title="Reef zone"))+
  ggtitle("Tahiti NW")+
  xlab("Axis 1 (59.4%)")+
  ylab("Axis 2 (10.8%)")+
  theme(axis.text=element_text(size=10))
gg.tnw

Stats

seq.tnw.rare <- data.frame(otu_table(ps.tnw))
samdf.tnw.rare <- data.frame(sample_data(ps.tnw))
row.names(samdf.tnw.rare)==row.names(seq.tnw.rare)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
dist.tnw.rare <- vegdist(seq.tnw.rare)
bet.tnw.rare <- betadisper(dist.tnw.rare,samdf.tnw.rare$zone,bias.adjust = TRUE,type="median")
anova(bet.tnw.rare) #p < 0.01**
## Analysis of Variance Table
## 
## Response: Distances
##           Df  Sum Sq  Mean Sq F value   Pr(>F)   
## Groups     1 0.20256 0.202560  8.2641 0.008141 **
## Residuals 25 0.61277 0.024511                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
permutest(bet.tnw.rare, pairwise = FALSE, permutations = 999) #says the same thing
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
## 
## Response: Distances
##           Df  Sum Sq  Mean Sq      F N.Perm Pr(>F)   
## Groups     1 0.20256 0.202560 8.2641    999  0.003 **
## Residuals 25 0.61277 0.024511                        
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(bet.tnw.rare)

adonis(seq.tnw.rare ~ zone, data=samdf.tnw.rare, permutations=999) #p < 0.01**
## 
## Call:
## adonis(formula = seq.tnw.rare ~ zone, data = samdf.tnw.rare,      permutations = 999) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## zone       1    0.9863 0.98632   9.015 0.26503  0.001 ***
## Residuals 25    2.7352 0.10941         0.73497           
## Total     26    3.7215                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

4.3 Summary

  • No differences in results between relative abundance & rarefied plots/stats
  • Pairwise adonis significant between all three sites
  • Adonis significant across reef zones at TNW & MSE, but not MNW
  • Beta dispersion not significantly different except between TNW F & B (can definitely see it)

Relative abundance

ggarrange(gg.mnw.rel,gg.mse.rel,gg.tnw.rel,nrow=1,common.legend=TRUE,legend="right",labels="AUTO")
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure

Rarefied

ggarrange(gg.mnw,gg.mse,gg.tnw,nrow=1,common.legend=TRUE,legend="right",labels=c("(a)","(b)","(c)"))

#ggsave("16s.pcoa.pdf",height=2.5,width=8)

5 ANCOM

Tutorial here

5.1 Setup

library(readr)
library(tidyverse)
#library(dplyr)
library(nlme)
## 
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
## 
##     collapse
#install.packages('compositions')
library(compositions)
## Welcome to compositions, a package for compositional data analysis.
## Find an intro with "? compositions"
## 
## Attaching package: 'compositions'
## The following objects are masked from 'package:stats':
## 
##     anova, cor, cov, dist, var
## The following objects are masked from 'package:base':
## 
##     %*%, norm, scale, scale.default
source("/Volumes/GoogleDrive/My Drive/Moorea_revisions/mr16s_revised/analysis/scripts/ancom_v2.1.R")

setwd("/Volumes/GoogleDrive/My Drive/Moorea_revisions/mr16s_revised/analysis/03.community_comp")
otu_data_unt <- data.frame(ps.trim@otu_table)
otu_data<- data.frame(t(otu_data_unt))
#might need to make the sample names an actual row yet, not sure

meta_data = data.frame(ps.trim@sam_data)
meta_data = meta_data %>% rename(Sample.ID = id)

# Step 1: Data preprocessing

feature_table = otu_data; sample_var = "Sample.ID"; group_var = NULL
out_cut = 0.05; zero_cut = 0.90; lib_cut = 1000; neg_lb = FALSE
prepro = feature_table_pre_process(feature_table, meta_data, sample_var, group_var, 
                                   out_cut, zero_cut, lib_cut, neg_lb)
feature_table = prepro$feature_table # Preprocessed feature table
meta_data = prepro$meta_data # Preprocessed metadata
struc_zero = prepro$structure_zeros # Structural zero info

# Step 2: ANCOM

main_var = "zone"; p_adj_method = "BH"; alpha = 0.05
adj_formula = NULL; rand_formula = "~ 1 | site"

res.all = ANCOM(feature_table, meta_data, struc_zero, main_var, p_adj_method, 
            alpha, adj_formula, rand_formula)

#saveRDS(res.mnw,file="ancom.res.mnw.RDS")

# Step 3: Volcano Plot

# Number of taxa except structural zeros
n_taxa = ifelse(is.null(struc_zero), nrow(feature_table), sum(apply(struc_zero, 1, sum) == 0))
# Cutoff values for declaring differentially abundant taxa
cut_off = c(0.9 * (n_taxa -1), 0.8 * (n_taxa -1), 0.7 * (n_taxa -1), 0.6 * (n_taxa -1))
names(cut_off) = c("detected_0.9", "detected_0.8", "detected_0.7", "detected_0.6")

# Annotation data
dat_ann = data.frame(x = min(res.all$fig$data$x), y = cut_off["detected_0.7"], label = "W[0.7]")

fig = res.all$fig +  
  geom_hline(yintercept = cut_off["detected_0.7"], linetype = "dashed") + 
  geom_text(data = dat_ann, aes(x = x, y = y, label = label), 
            size = 4, vjust = -0.5, hjust = 0, color = "orange", parse = TRUE)
fig  

5.2 ANCOM by site

Note: shouldn’t be done on rarefied according to authors

Ran these three chunks once, then loading in data below

5.2.1 MNW

ps.mnw <- subset_samples(ps.trim,site=="MNW")

otu_data_unt <- data.frame(ps.mnw@otu_table)
otu_data<- data.frame(t(otu_data_unt))
#might need to make the sample names an actual row yet, not sure

meta_data = data.frame(ps.mnw@sam_data)
meta_data = meta_data %>% rename(Sample.ID = id)

# Step 1: Data preprocessing

feature_table = otu_data; sample_var = "Sample.ID"; group_var = NULL
out_cut = 0.05; zero_cut = 0.90; lib_cut = 1000; neg_lb = FALSE
prepro = feature_table_pre_process(feature_table, meta_data, sample_var, group_var, 
                                   out_cut, zero_cut, lib_cut, neg_lb)
feature_table = prepro$feature_table # Preprocessed feature table
meta_data = prepro$meta_data # Preprocessed metadata
struc_zero = prepro$structure_zeros # Structural zero info

# Step 2: ANCOM

main_var = "zone"; p_adj_method = "BH"; alpha = 0.05
adj_formula = NULL; rand_formula = NULL

res.mnw = ANCOM(feature_table, meta_data, struc_zero, main_var, p_adj_method, 
            alpha, adj_formula, rand_formula)

#saveRDS(res.mnw,file="ancom.res.mnw.RDS")

# Step 3: Volcano Plot

# Number of taxa except structural zeros
n_taxa = ifelse(is.null(struc_zero), nrow(feature_table), sum(apply(struc_zero, 1, sum) == 0))
# Cutoff values for declaring differentially abundant taxa
cut_off = c(0.9 * (n_taxa -1), 0.8 * (n_taxa -1), 0.7 * (n_taxa -1), 0.6 * (n_taxa -1))
names(cut_off) = c("detected_0.9", "detected_0.8", "detected_0.7", "detected_0.6")

# Annotation data
dat_ann = data.frame(x = min(res.mnw$fig$data$x), y = cut_off["detected_0.7"], label = "W[0.7]")

fig = res.mnw$fig +  
  geom_hline(yintercept = cut_off["detected_0.7"], linetype = "dashed") + 
  geom_text(data = dat_ann, aes(x = x, y = y, label = label), 
            size = 4, vjust = -0.5, hjust = 0, color = "orange", parse = TRUE)
fig  

5.2.2 MSE

ps.mse <- subset_samples(ps.trim,site=="MSE")

otu_data_unt <- data.frame(ps.mse@otu_table)
otu_data<- data.frame(t(otu_data_unt))
#might need to make the sample names an actual row yet, not sure

meta_data = data.frame(ps.mse@sam_data)
meta_data = meta_data %>% rename(Sample.ID = id)

# Step 1: Data preprocessing

feature_table = otu_data; sample_var = "Sample.ID"; group_var = NULL
out_cut = 0.05; zero_cut = 0.90; lib_cut = 1000; neg_lb = FALSE
prepro = feature_table_pre_process(feature_table, meta_data, sample_var, group_var, 
                                   out_cut, zero_cut, lib_cut, neg_lb)
feature_table = prepro$feature_table # Preprocessed feature table
meta_data = prepro$meta_data # Preprocessed metadata
struc_zero = prepro$structure_zeros # Structural zero info

# Step 2: ANCOM

main_var = "zone"; p_adj_method = "BH"; alpha = 0.05
adj_formula = NULL; rand_formula = NULL

res.mse = ANCOM(feature_table, meta_data, struc_zero, main_var, p_adj_method, 
            alpha, adj_formula, rand_formula)

#saveRDS(res.mse,file="ancom.res.mse.RDS")

# Step 3: Volcano Plot

# Number of taxa except structural zeros
n_taxa = ifelse(is.null(struc_zero), nrow(feature_table), sum(apply(struc_zero, 1, sum) == 0))
# Cutoff values for declaring differentially abundant taxa
cut_off = c(0.9 * (n_taxa -1), 0.8 * (n_taxa -1), 0.7 * (n_taxa -1), 0.6 * (n_taxa -1))
names(cut_off) = c("detected_0.9", "detected_0.8", "detected_0.7", "detected_0.6")

# Annotation data
dat_ann = data.frame(x = min(res.mse$fig$data$x), y = cut_off["detected_0.7"], label = "W[0.7]")

fig = res.mse$fig +  
  geom_hline(yintercept = cut_off["detected_0.7"], linetype = "dashed") + 
  geom_text(data = dat_ann, aes(x = x, y = y, label = label), 
            size = 4, vjust = -0.5, hjust = 0, color = "orange", parse = TRUE)
fig  

5.2.3 TNW

ps.tnw <- subset_samples(ps.trim,site=="TNW")

otu_data_unt <- data.frame(ps.tnw@otu_table)
otu_data<- data.frame(t(otu_data_unt))
#might need to make the sample names an actual row yet, not sure

meta_data = data.frame(ps.tnw@sam_data)
meta_data = meta_data %>% rename(Sample.ID = id)

# Step 1: Data preprocessing

feature_table = otu_data; sample_var = "Sample.ID"; group_var = NULL
out_cut = 0.05; zero_cut = 0.90; lib_cut = 1000; neg_lb = FALSE
prepro = feature_table_pre_process(feature_table, meta_data, sample_var, group_var, 
                                   out_cut, zero_cut, lib_cut, neg_lb)
feature_table = prepro$feature_table # Preprocessed feature table
meta_data = prepro$meta_data # Preprocessed metadata
struc_zero = prepro$structure_zeros # Structural zero info

# Step 2: ANCOM

main_var = "zone"; p_adj_method = "BH"; alpha = 0.05
adj_formula = NULL; rand_formula = NULL

res.tnw = ANCOM(feature_table, meta_data, struc_zero, main_var, p_adj_method, 
            alpha, adj_formula, rand_formula)

#saveRDS(res.tnw,file="ancom.res.tnw.RDS")

# Step 3: Volcano Plot

# Number of taxa except structural zeros
n_taxa = ifelse(is.null(struc_zero), nrow(feature_table), sum(apply(struc_zero, 1, sum) == 0))
# Cutoff values for declaring differentially abundant taxa
cut_off = c(0.9 * (n_taxa -1), 0.8 * (n_taxa -1), 0.7 * (n_taxa -1), 0.6 * (n_taxa -1))
names(cut_off) = c("detected_0.9", "detected_0.8", "detected_0.7", "detected_0.6")

# Annotation data
dat_ann = data.frame(x = min(res.tnw$fig$data$x), y = cut_off["detected_0.7"], label = "W[0.7]")

fig = res.tnw$fig +  
  geom_hline(yintercept = cut_off["detected_0.7"], linetype = "dashed") + 
  geom_text(data = dat_ann, aes(x = x, y = y, label = label), 
            size = 4, vjust = -0.5, hjust = 0, color = "orange", parse = TRUE)
fig  

5.3 Synthesizing ANCOM results

Re-read in data

res.mnw <- readRDS("ancom.res.mnw.RDS")
res.mse <- readRDS("ancom.res.mse.RDS")
res.tnw <- readRDS("ancom.res.tnw.RDS")

Which ones are ‘significant’

mnw.out <- res.mnw$out
mnw.out.sig <- mnw.out[mnw.out$detected_0.6==TRUE,]
#1

mse.out <- res.mse$out
mse.out.sig <- mse.out[mse.out$detected_0.6==TRUE,]
#8

tnw.out <- res.tnw$out
tnw.out.sig <- tnw.out[tnw.out$detected_0.6==TRUE,]
#4

Subset the sig ones in phyloseq

want.mnw <- c(mnw.out.sig$taxa_id)
want.mse <- c(mse.out.sig$taxa_id)
want.tnw <- c(tnw.out.sig$taxa_id)

want <- c(want.mnw,want.mse,want.tnw)

ps.sig.taxa <- subset_taxa(ps.rare.trim,row.names(ps.trim@tax_table) %in% want)
#37 & 13 are in there twice

#looks so cool!
plot_bar(ps.sig.taxa,x="site_zone",y="Abundance",fill="Genus")+
  facet_wrap(~Genus,scales="free")

#ggsave("sig.genus.abun.pdf",width=11)

Heat map attempts

library("microbiomeutilities")
## Warning: replacing previous import 'ggplot2::alpha' by 'microbiome::alpha' when
## loading 'microbiomeutilities'
library("viridis")
## Loading required package: viridisLite
ps.sig.mnw <- subset_samples(ps.sig.taxa,site=="MNW")

ps.sig.mnw.sz <- merge_samples(ps.sig.mnw,"site_zone")
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion
ps.sig.mnw.sz@sam_data$zone <- c("A","B")

heat.sig.mnw <- plot_taxa_heatmap(ps.sig.mnw.sz,
  subset.top=14,                               
  VariableA = "zone",
  heatcolors=colorRampPalette(colors=c(viridis_pal(direction=-1,option="D")(32)))(30),
  #heatcolors = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(100),
  transformation = "log10",
  cluster_cols=F,
  cluster_rows=F
)
## Top 14 OTUs selected
## log10, if zeros in data then log10(1+x) will be used
## First top taxa were selected and 
## then abundances tranformed to log10(1+X)
## Warning in transform(phyobj1, "log10"): OTU table contains zeroes. Using log10(1
## + x) transform.

#pdf: 3 x 4 in

ps.sig.mse <- subset_samples(ps.sig.taxa,site=="MSE")

ps.sig.mse.sz <- merge_samples(ps.sig.mse,"site_zone")
## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion
ps.sig.mse.sz@sam_data$zone <- c("A","B")

heat.sig.mse <- plot_taxa_heatmap(ps.sig.mse.sz,
  subset.top=14,                               
  VariableA = "zone",
  heatcolors=colorRampPalette(colors=c(viridis_pal(direction=-1,option="D")(32)))(30),
  #heatcolors = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(100),
  transformation = "log10",
  cluster_cols=F,
  cluster_rows=F
)
## Top 14 OTUs selected
## log10, if zeros in data then log10(1+x) will be used
## First top taxa were selected and 
## then abundances tranformed to log10(1+X)
## Warning in transform(phyobj1, "log10"): OTU table contains zeroes. Using log10(1
## + x) transform.

ps.sig.tnw <- subset_samples(ps.sig.taxa,site=="TNW")

ps.sig.tnw.sz <- merge_samples(ps.sig.tnw,"site_zone")
## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion
ps.sig.tnw.sz@sam_data$zone <- c("A","B")

heat.sig.tnw <- plot_taxa_heatmap(ps.sig.tnw.sz,
  subset.top=14,                               
  VariableA = "zone",
  heatcolors=colorRampPalette(colors=c(viridis_pal(direction=-1,option="D")(32)))(30),
  #heatcolors = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(100),
  transformation = "log10",
  cluster_cols=F,
  cluster_rows=F
)
## Top 14 OTUs selected
## log10, if zeros in data then log10(1+x) will be used
## First top taxa were selected and 
## then abundances tranformed to log10(1+X)
## Warning in transform(phyobj1, "log10"): OTU table contains zeroes. Using log10(1
## + x) transform.

heat.sig <- plot_taxa_heatmap(ps.sig.taxa,
  subset.top=14,                               
  VariableA = "zone",
  heatcolors = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(100),
  transformation = "log10",
  cluster_cols=T
)

ps.sz <- merge_samples(ps.rare.trim, "site_zone")

ps.sig.taxa.sz <- subset_taxa(ps.sz,row.names(ps.sz@tax_table) %in% want)
#37 & 13 are in there twice

ps.sig.taxa.sz@sam_data$zone <- c("A","B","A","B","A","B")

heat.sig <- plot_taxa_heatmap(ps.sig.taxa.sz,
  subset.top=14,                               
  VariableA = "zone",
  heatcolors = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(100),
  transformation = "Z-OTU",
  cluster_cols=F
)

6 Heat maps!

#evtools::install_github("microsud/microbiomeutilities")
library("microbiomeutilities")

#package doesn't like the sq level
taxa.tab <- as.data.frame(ps.trim@tax_table)
taxa.tab.nosq <- as.matrix(taxa.tab[,1:7])
seq.trim.tab <- as.data.frame(ps.trim@otu_table)

rownames(samdf) <- samdf$id

ps.trim.nosq <- phyloseq(otu_table(seq.trim.tab, taxa_are_rows=FALSE), 
                         sample_data(samdf), 
                         tax_table(taxa.tab.nosq))



library(RColorBrewer)

heat.sample <- plot_taxa_heatmap(ps.trim.nosq,
  subset.top=223,                               
  VariableA = "site",
  heatcolors = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(100),
  transformation = "log10",
  cluster_cols=T
)
## Top 223 OTUs selected
## log10, if zeros in data then log10(1+x) will be used
## First top taxa were selected and 
## then abundances tranformed to log10(1+X)
## Warning in transform(phyobj1, "log10"): OTU table contains zeroes. Using log10(1
## + x) transform.

ps.trim.nosq.mnw <- subset_samples(ps.trim.nosq,site=="MNW")

heat.mnw <- plot_taxa_heatmap(ps.trim.nosq.mnw,
  subset.top = 20,
  VariableA = "zone",
  heatcolors = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(100),
  transformation = "log10",
  cluster_cols=T
)
## Top 20 OTUs selected
## log10, if zeros in data then log10(1+x) will be used
## First top taxa were selected and 
## then abundances tranformed to log10(1+X)
## Warning in transform(phyobj1, "log10"): OTU table contains zeroes. Using log10(1
## + x) transform.

ps.trim.nosq.mse <- subset_samples(ps.trim.nosq,site=="MSE")

heat.mse <- plot_taxa_heatmap(ps.trim.nosq.mse,
  subset.top = 20,
  VariableA = "zone",
  heatcolors=colorRampPalette(colors=c(viridis_pal(direction=-1,option="D")(32)))(30),
  #heatcolors = colorRampPalette(brewer.pal(n = 7, name = "Purples"))(100),#took out rev() here
  transformation = "log10",
  cluster_cols=T
)
## Top 20 OTUs selected
## log10, if zeros in data then log10(1+x) will be used
## First top taxa were selected and 
## then abundances tranformed to log10(1+X)
## Warning in transform(phyobj1, "log10"): OTU table contains zeroes. Using log10(1
## + x) transform.

ps.sz <- merge_samples(ps.rare.trim, "site_zone")
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion
ps.sz@sam_data$zone <- c("A","B","A","B","A","B")

heat.all.sz <- plot_taxa_heatmap(ps.sz,
  subset.top=20,                               
  VariableA = "zone",
  heatcolors = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(100),
  transformation = "Z-OTU",
  cluster_cols=F
)
## Top 20 OTUs selected
## Warning in transform(x, "log10"): OTU table contains zeroes. Using log10(1 + x)
## transform.
## First top taxa were selected and 
## then abundances tranformed to Z values